{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 聚合操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.63887634,  0.35223505,  0.22536245,  0.22927974,  0.54591735,\n",
       "        0.04803576,  0.681302  ,  0.97771345,  0.25922196,  0.6378412 ,\n",
       "        0.05423964,  0.41962974,  0.59911327,  0.04683758,  0.12505498,\n",
       "        0.70019644,  0.80441551,  0.31266538,  0.34631703,  0.54712781,\n",
       "        0.09433605,  0.61481345,  0.53609996,  0.86125847,  0.73258809,\n",
       "        0.28647909,  0.84310738,  0.18473059,  0.65071228,  0.40652579,\n",
       "        0.31241333,  0.54490466,  0.3252323 ,  0.08207719,  0.08672767,\n",
       "        0.27819405,  0.84369013,  0.67503273,  0.37182507,  0.60311559,\n",
       "        0.32498608,  0.70295542,  0.40411346,  0.71166807,  0.89878553,\n",
       "        0.84266199,  0.34141528,  0.18968278,  0.61050794,  0.37215183,\n",
       "        0.85766594,  0.16185218,  0.45669429,  0.59525382,  0.30749706,\n",
       "        0.41806831,  0.61567742,  0.67749548,  0.54010829,  0.04769625,\n",
       "        0.07244345,  0.24649305,  0.99243801,  0.6476653 ,  0.27174724,\n",
       "        0.44355766,  0.26465264,  0.39245519,  0.52819085,  0.76355187,\n",
       "        0.85945758,  0.56966194,  0.6278681 ,  0.04339274,  0.41409823,\n",
       "        0.97445783,  0.88710993,  0.29693675,  0.95673487,  0.80679159,\n",
       "        0.08367462,  0.3443286 ,  0.34132536,  0.01943037,  0.30496223,\n",
       "        0.78837242,  0.98494468,  0.78321666,  0.62122643,  0.15913881,\n",
       "        0.64681515,  0.75006175,  0.45126854,  0.14436954,  0.56214999,\n",
       "        0.90739165,  0.77384847,  0.31662641,  0.2038196 ,  0.59885578])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "L = np.random.random(100)\n",
    "L"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48.807715829320522"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(L)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "48.807715829320514"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(L) # 效率要比系统自带地sum高很多"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10 loops, best of 3: 115 ms per loop\n",
      "1000 loops, best of 3: 723 µs per loop\n"
     ]
    }
   ],
   "source": [
    "big_array = np.random.rand(1000000)\n",
    "%timeit sum(big_array) # 和下面的方法的效率差了100多倍\n",
    "%timeit np.sum(big_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.99999972284945604"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.max(big_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.8567434130528682e-07"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.min(big_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6.8567434130528682e-07"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "big_array.min()  # 用面向对象地方法求最大和最小值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.99999972284945604"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "big_array.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11],\n",
       "       [12, 13, 14, 15]])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.arange(16).reshape(4, -1)  # 有4行就行，-1代表列数自动分配\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "120"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(X) # 求所有元素的和"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([24, 28, 32, 36])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(X, axis=0)  # 求每列元素的和(沿着行的方向)组成一个行向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 6, 22, 38, 54])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(X, axis=1)  # 求每行元素的和(沿着列的方向)组成一个列向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.prod(X)  # X中所有元素的乘积"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "20922789888000"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.prod(X + 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.5"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.median(X) # 中位数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1,  1,  2,  2, 10])"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "v = np.array([1, 1, 2, 2, 10])\n",
    "v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.2000000000000002"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(v)  # 平均值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.0"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.median(v) # 中位数，能更好地描述样本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.49893246220088033"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.percentile(big_array, q = 50) # 矩阵big_array中50%的值都是小于下面的结果地,不写q默认也是50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.99999972284945604"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.percentile(big_array, q = 100) # 矩阵big_array中100%的值都是小于下面的结果地"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6.85674341305e-07\n",
      "0.249347213266\n",
      "0.498932462201\n",
      "0.749311031619\n",
      "0.999999722849\n"
     ]
    }
   ],
   "source": [
    "for percent in [0, 25, 50, 75, 100]:\n",
    "    print(np.percentile(big_array, q = percent))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.083348722076812751"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.var(big_array)  # 数组的方差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.28870178744997882"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(big_array) # 标准差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "x = np.random.normal(0, 1, size=1000000)  # 产生100万个均值为0，标准差为1的随机数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.00039331062524494009"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(x) # 均值非常接近0了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0003192662733769"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(x) # 标准差，果然非常接近1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
